{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Tensorflow读取数据的Dataset API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. 读取数据构造Dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取列表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: (2,), types: tf.int32>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas = [[1, 2], [3, 4], [5, 6]]\n",
    "dataset = tf.data.Dataset.from_tensor_slices(datas)\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor([1 2], shape=(2,), dtype=int32)\n",
      "tf.Tensor([3 4], shape=(2,), dtype=int32)\n",
      "tf.Tensor([5 6], shape=(2,), dtype=int32)\n"
     ]
    }
   ],
   "source": [
    "for row in dataset:\n",
    "    print(row)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取字典"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: {a: (), b: (), c: ()}, types: {a: tf.int32, b: tf.int32, c: tf.int32}>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas = {\"a\": [1, 2, 4], \"b\": [4, 5, 6], \"c\": [7,8,9]}\n",
    "dataset = tf.data.Dataset.from_tensor_slices(datas)\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'a': 1, 'b': 4, 'c': 7}\n",
      "{'a': 2, 'b': 5, 'c': 8}\n",
      "{'a': 4, 'b': 6, 'c': 9}\n"
     ]
    }
   ],
   "source": [
    "for data in dataset.as_numpy_iterator():\n",
    "    print(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 读取元组"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<TensorSliceDataset shapes: ((3,), ()), types: (tf.int32, tf.int32)>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features = [\n",
    "    [1,2,3],\n",
    "    [4,5,6],\n",
    "    [7,8,9]\n",
    "]\n",
    "labels = [1, 0, 1]\n",
    "\n",
    "dataset = tf.data.Dataset.from_tensor_slices((features, labels))\n",
    "dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([1, 2, 3]), 1)\n",
      "(array([4, 5, 6]), 0)\n",
      "(array([7, 8, 9]), 1)\n"
     ]
    }
   ],
   "source": [
    "for data in dataset.as_numpy_iterator():\n",
    "    print(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2. 对Dataset执行各种转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "datas = list(range(20))\n",
    "dataset = tf.data.Dataset.from_tensor_slices(datas)\n",
    "list(dataset.as_numpy_iterator())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### map操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_map = dataset.map(lambda x : x+1)\n",
    "list(dataset_map.as_numpy_iterator())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### batch操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([0, 1, 2]),\n",
       " array([3, 4, 5]),\n",
       " array([6, 7, 8]),\n",
       " array([ 9, 10, 11]),\n",
       " array([12, 13, 14]),\n",
       " array([15, 16, 17]),\n",
       " array([18, 19])]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset_batch = dataset.batch(3)\n",
    "list(dataset_batch.as_numpy_iterator())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### repeat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_repeat = dataset.repeat(3)\n",
    "list(dataset_batch.as_numpy_iterator())"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
